library(plotly)
library(tidyverse)
library(tigris)
library(censusapi)
library(sf)
library(mapview)
library(leaflet)
library(lehdr)
library(ggplot2)
library(ggthemes)
options(
tigris_class = "sf",
tigris_use_cache = T
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
Updated 05/11/2020 # Mapping distribution of essential workers by block group for San Jose Analysis: Using the CA Essential Business Guidelines,the LODES dataset and the QWI dataset, the % of essential workers (by NAICS code) was mapped to each block group in San Jose.
The data used to assess the % of workers deemed essential was determined using the following approach. The CA Essential proportion of 6-digit NAICS codes in each 4-digit NAICS was assessed here using a data set prepared by the CEE 218Z Vulnerability Team (linked here:https://docs.google.com/spreadsheets/d/1piMnUpohkY-HquAhuKEzeJVhCdmxTX75b8S6mnNjxQY/edit#gid=0). The CEE 218Z team read through the “Essential Critical Infrastructure Order” March 22nd 2020 and manually assigned the “essential”/“nonessential” designation by reading the order and control “F” searching through the list of 6-digit NAICS codes. There were two methods of analysis. Method 1: 4-digit NAICS codes were weighted by fraction of 6-digit NAICS codes in the 4-digit NAICS code. Then this fraction was multiplied by the number of employees in each 4-digit NAICS code (using LODES data). Then proportion of essential workers in each 2-digit NAICS code was calculated by dividing number of essential employees in each 4-digit NAICS code by the number of total employees.
Method 2: Only 2-digit NAICS codes with >= 50% of the 6-digit sub-codes were marked as essential.
Following calculation of the fraction of essential workers in each block group. It was mapped using leaflet.
First get the block groups in San Jose
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)
# Use tracts sent to us by San Jose
dir <- "pCloud Drive/SFBI/Data Library"
sj_tracts <- st_read(file.path(dir,"San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp")) %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## CRS: 2227
sj_citycouncil_disticts <- st_read(file.path(dir, "San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp")) %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## CRS: 2227
# from code written by others to get SJ blockgroups
sj_blockgroups <-
scc_blockgroups %>%
st_centroid() %>%
st_join(sj_tracts, left = F) %>%
st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
mutate(
DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
) %>%
st_set_geometry(NULL) %>%
left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
st_as_sf() %>%
dplyr::select(GEOID, DISTRICTS)
# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9
sj_boundary <-
places("CA", cb=F, progress_bar=F) %>%
filter(NAME == "San Jose")
Next obtain the distribution of workers from the LODES dataset
# define the LODES categories, from https://lehd.ces.census.gov/data/lodes/LODES7/LODESTechDoc7.3.pdf
LODES_variable <- c('CNS01', 'CNS02', 'CNS03', 'CNS04', 'CNS05', 'CNS05', 'CNS05', 'CNS06', 'CNS07', 'CNS07', 'CNS08', 'CNS08', 'CNS09', 'CNS10', 'CNS11', 'CNS12', 'CNS13', 'CNS14', 'CNS15', 'CNS16', 'CNS17', 'CNS18', 'CNS19', 'CNS20')
LODES_NAICS <- c('11', '21', '22', '23', '31', '32', '33', '42', '44', '45', '48', '49', '51', '52', '53', '54', '55', '56', '61', '62', '71', '72', '81', '92')
LODES_keys <- data.frame(LODES_variable, LODES_NAICS)
# get the LODES data
sj_rac <-
grab_lodes(
state = "ca",
year = 2017,
lodes_type = "rac",
job_type = "JT01",
segment = "S000",
state_part = "main",
agg_geo = "bg"
) %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
filter(!is.na(DISTRICTS)) %>%
dplyr::select(-c(contains("CE"),contains("CA"),contains("CR"), contains("CT"),contains("CS"),contains("CD")))
Get QWI Data which tells us number of works in each 4 digit NAICS code
# Get distribution of jobs in Santa Clara County
#Get NAICS codes
label_industry <-
read_csv(file.path(dir,"NAICS/label_industry.csv"))
#GetQWI data
qwi_scc <- NULL
for(years in 2017){
qwi<-
getCensus(
name = "timeseries/qwi/sa",
region = "county:085",
regionin = "state:06",
vars = c("EmpS","EarnS","industry","ind_level"),
time = years
) %>%
filter(ind_level == 4) %>%
mutate(
year = substr(time,1,4)
) %>%
left_join(label_industry, by= "industry") %>%
group_by(year,industry,label) %>%
summarize(
EmpS = round(mean(as.numeric(EmpS), na.rm = TRUE),0),
EarnS = round(mean(as.numeric(EarnS), na.rm = TRUE),0)
) %>%
filter(!is.na(EmpS) & EmpS != 0)
qwi_scc<-
rbind(qwi_scc,qwi)
}
arrange(qwi, desc(EmpS))
## # A tibble: 268 x 5
## # Groups: year, industry [268]
## year industry label EmpS EarnS
## <chr> <chr> <chr> <dbl> <dbl>
## 1 2017 5415 Computer Systems Design and Related Services 74335 15200
## 2 2017 7225 Restaurants and Other Eating Places 54063 2217
## 3 2017 5191 Other Information Services 45339 34325
## 4 2017 3341 Computer and Peripheral Equipment Manufacturing 42593 21186
## 5 2017 3344 Semiconductor and Other Electronic Component Manu… 38376 19610
## 6 2017 6111 Elementary and Secondary Schools 36927 5059
## 7 2017 6221 General Medical and Surgical Hospitals 31975 8206
## 8 2017 6241 Individual and Family Services 24505 1623
## 9 2017 6113 Colleges, Universities, and Professional Schools 23194 8034
## 10 2017 5112 Software Publishers 21594 26285
## # … with 258 more rows
Get Essential Deisgnations
#Here I will use the Vulnerability Team's California Essential Business NAICS code list according to California State Public Health Officer's Report
dir <- "/Users/spencerrobinson/pCloud Drive/SFBI/Data Library"
CA_essential <- read_csv(file.path(dir,'Essential Designations/ca_essential_designations.csv'))
CA_essential<- CA_essential %>%
mutate(`CA Essential` = replace_na(`CA Essential`, FALSE))
CA_essential$`CA Essential` <- as.logical(CA_essential$`CA Essential`)
#Here I use Delaware's Essential Business NAICS code list (only goes to 4 digit)
DE_essential <- read_csv(file.path(dir, 'Essential Designations/delaware_essential_designations.csv'))
Spencer Method 1: Use CA Essential 6-digit NAICS codes to assess Fraction-A: Fraction of 6-digit NAICS codes in each 4-digit NAICS code. Use QWI data to assess Fraction-B: Fraction of 4-digit NAICS code workers in each 2-digit NAICS code category (corresponds to LODES data). To determine % essential workers we multiply Fraction-A by Fraction-B.
CA_essential_weighted_grouped_ca1<- CA_essential %>%
mutate(NAICS_4_dig = str_sub(NAICS, 1,4)) %>%
group_by(NAICS_4_dig, `CA Essential`) %>%
summarize(count = n()) %>%
ungroup() %>%
spread(`CA Essential`,count) %>%
mutate(`FALSE` = replace_na(`FALSE`, 0), `TRUE` = replace_na(`TRUE`, 0)) %>%
rename(c("essential" = "TRUE", "nonessential" = "FALSE")) %>%
mutate(totalCount = essential + nonessential, fracEssential_4dig = essential / totalCount) %>% #FractionA
full_join(qwi_scc, by = c('NAICS_4_dig' = 'industry')) %>% # Bring in QWI data with number of workers 4digit NAICS
mutate(EmpS = replace_na(EmpS,0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
mutate(fracEssential_4dig =replace(fracEssential_4dig, NAICS_4_dig== "8141", 0), EmpS = replace(EmpS, NAICS_4_dig == "8141",0)) %>% #Manually assign NAICS code 8141 as nonessential
mutate(essEmps = EmpS*fracEssential_4dig) %>% #Number of Essenital employees in each 4 digit category
mutate(NAICS_2_dig = str_sub(NAICS_4_dig, 1,2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>% #Bring in LODES data with
group_by(LODES_variable) %>%
summarise(EmpS = sum(EmpS), essEmps = sum(essEmps)) %>%
mutate(fracEssential_2dig = essEmps/EmpS) %>%
mutate(df = "CA1")
Simone’s Method 1 with DE Essential Business Data: Use the QWI data to find the distribution of workers in NAICS 4-digit codes in Santa Clara County, and use this to weight the contributions to each 2-digit code. Note that this method disregards information about 6 digit codes within the 4 digit code that differ from the broader 4 digit code; this could be adjusted, but without data on the distribution of 6 digit within 4 digit I thought it was best to leave out.
#CA Fraction Employed; DE designations
CA_essential_weighted_grouped_de1 <- DE_essential %>%
mutate(NAICS = str_sub(NAICS, 1,4)) %>%
full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
mutate(EmpS = replace_na(EmpS, 0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
group_by(LODES_variable, Essential) %>%
summarize(totalWorkers = sum(EmpS)) %>% # get number of 4 digit codes within each 2 digit code that are essential and nonessential and total workers essential/nonessential
mutate(Essential = replace_na(Essential, TRUE)) %>% #CNS20 or NAICS code 92 isn't in the DE dataset and is given as NA, but it is public administration and we assume essential
ungroup() %>%
spread(Essential, totalWorkers, fill = 0) %>%
rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>%
mutate(totalCount = Essential + Nonessential, fracEssential_2dig = Essential / totalCount) %>%
mutate(df = "DE") %>%
select("LODES_variable","fracEssential_2dig","df")
#Let's compare the fraction essential for Spencer Method 1 (CA Designations) v. Simone Method 1 (DE Designations)
compare_df <- bind_rows(CA_essential_weighted_grouped_ca1, CA_essential_weighted_grouped_de1)
compare_graph <- ggplot(data = compare_df, aes(x = LODES_variable, y = fracEssential_2dig, fill = df))+
geom_bar(stat = "identity", position = position_dodge())+
ggtitle(" Fraction Employed in Santa Clara; Comparing Delaware and California Designations")+
theme_fivethirtyeight()+
theme(
plot.title = element_text(size=14, face="bold.italic"),
axis.text.x = element_text(angle = 90, size=14),
axis.title.y = element_text(size=14)
)
compare_graph
# get essential breakdown in San Jose - CA designations
sj_rac_essential_ca1 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(CA_essential_weighted_grouped_ca1 %>% dplyr::select(fracEssential_2dig, LODES_variable), by = c("Category" = "LODES_variable")) %>%
mutate(numEssential = fracEssential_2dig * Number) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(numEssential)) %>%
ungroup() %>%
mutate(fracEssential_2dig = round((totalEssential / C000)*100, 1))
sj_rac_essential_de1 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(CA_essential_weighted_grouped_de1 %>% dplyr::select(fracEssential_2dig, LODES_variable), by = c("Category" = "LODES_variable")) %>%
mutate(numEssential = fracEssential_2dig * Number) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(numEssential)) %>%
ungroup() %>%
mutate(fracEssential_2dig = round((totalEssential / C000)*100, 1))
Map of method 1 using California Essential Business Designations
# set up palette
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(sj_rac_essential_ca1 %>%
pull(fracEssential_2dig) %>%
unique())
)
ca_map1 <- leaflet(data = sj_rac_essential_ca1) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential_ca1 %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential_2dig),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential_2dig,"% of workers that are essential by block group"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
)%>%
addLegend(pal = blue_pal, values = ~fracEssential_2dig, opacity = 1, title = "% Workers in Essential Bus.")
ca_map1
Map of method 1 using Delaware Essential Business Designations
red_pal <- colorNumeric(
palette = "Reds",
domain =
c(sj_rac_essential_de1 %>%
pull(fracEssential_2dig) %>%
unique())
)
de_map1 <- leaflet(data = sj_rac_essential_de1) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential_de1 %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~red_pal(fracEssential_2dig),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential_2dig,"% of workers that are essential by block group "),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(pal = red_pal, values = ~fracEssential_2dig, opacity = 1, title = "% Workers in Essential Bus.")
de_map1
Spencer Method 2: Only count as essential the 2 digit NAICS codes with >=80% of the 6 digit sub-codes marked as essential
CA_essential_grouped <- CA_essential %>%
mutate(NAICS = str_sub(NAICS, 1,6)) %>% #Convert 6 digit NAICS to string
mutate(NAICS_2_dig = str_sub(NAICS, 1,2)) %>% #Get 2 digit NAICS which correspond to LODES
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
group_by(LODES_variable, `CA Essential`) %>%
summarize(Count = n()) %>%
spread(`CA Essential`, Count) %>%
mutate(`FALSE` = replace_na(`FALSE`,FALSE), `TRUE` = replace_na(`TRUE`,TRUE)) %>%
rename(c("essential" = "TRUE", "nonessential" = "FALSE")) %>%
mutate(totalCount = essential+nonessential, fracEssential = essential/totalCount) %>%
mutate(df = "CA2")
CA_essential <- ggplot(CA_essential_grouped, aes(fracEssential))+
geom_histogram(binwidth = 0.1)
CA_essential_grouped <- CA_essential_grouped %>% mutate(binaryEssential = fracEssential >= 0.5) # 0.5 used because of low fracEssential values for most NAICS codes
# get essential breakdown in San Jose - CA designations
sj_rac_essential_grouped <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(CA_essential_grouped %>% dplyr::select(binaryEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>%
filter(binaryEssential == TRUE) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(Number)) %>%
ungroup() %>%
mutate(fracEssential = round((totalEssential / C000)*100, 1))
essential_workers_visualize <- ggplot(data = sj_rac_essential_grouped, aes(x = fracEssential))+
geom_histogram()+
labs(y = "Number of Block Groups", x = "% Essential Workers")+
ggtitle("Distribution of Block Group Essential Worker Data ")
essential_workers_visualize
Map of method 2 using California Essential Business Designations
# set up palette
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(sj_rac_essential_grouped %>%
pull(fracEssential) %>%
unique())
)
ca_map2 <- leaflet(data = sj_rac_essential_grouped) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential_grouped %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"%"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Workers in Essential Bus.")
ca_map2